
<!DOCTYPE HTML>
<html lang="" >
    <head>
        <meta charset="UTF-8">
        <meta content="text/html; charset=utf-8" http-equiv="Content-Type">
        <title>Dataset操作示例 · 基于spark的快速离线分析框架使用文档</title>
        <meta http-equiv="X-UA-Compatible" content="IE=edge" />
        <meta name="description" content="">
        <meta name="generator" content="GitBook 3.2.3">
        <meta name="author" content="shengshi_feiyang@yeah.net">
        
        
    
    <link rel="stylesheet" href="../gitbook/style.css">

    
            
                
                <link rel="stylesheet" href="../gitbook/gitbook-plugin-back-to-top-button/plugin.css">
                
            
                
                <link rel="stylesheet" href="../gitbook/gitbook-plugin-cuav-chapters/cuav-chapters.css">
                
            
                
                <link rel="stylesheet" href="../gitbook/gitbook-plugin-splitter/splitter.css">
                
            
                
                <link rel="stylesheet" href="../gitbook/gitbook-plugin-multipart/multipart.css">
                
            
                
                <link rel="stylesheet" href="../gitbook/gitbook-plugin-highlight/website.css">
                
            
                
                <link rel="stylesheet" href="../gitbook/gitbook-plugin-search/search.css">
                
            
                
                <link rel="stylesheet" href="../gitbook/gitbook-plugin-theme-fexa/fexa.css">
                
            
        

    

    
        
    
        
    
        
    
        
    
        
    
        
    

        
    
    
    <meta name="HandheldFriendly" content="true"/>
    <meta name="viewport" content="width=device-width, initial-scale=1, user-scalable=no">
    <meta name="apple-mobile-web-app-capable" content="yes">
    <meta name="apple-mobile-web-app-status-bar-style" content="black">
    <link rel="apple-touch-icon-precomposed" sizes="152x152" href="../gitbook/images/apple-touch-icon-precomposed-152.png">
    <link rel="shortcut icon" href="../gitbook/images/favicon.ico" type="image/x-icon">

    
    <link rel="next" href="../spark-code/" />
    
    
    <link rel="prev" href="java-rdd.html" />
    

    </head>
    <body>
        
<div class="book">
	<div class="header-inner">
		<!-- LOGO -->
		<div class="logo"></div>
		<span class="title"></span>

		<!-- Search -->
		
<div id="book-search-input" role="search">
    <input type="text" placeholder="Type to search" />
</div>


		<!-- Nav -->
		<ul class="header-nav">
			<li>
				<a href="https://gitee.com/shengshifeiyang/easy-spark" target="_blank">代码仓库</a>
			</li>
		</ul>
	</div>

	<div class="book-summary">
		<div class="book-summary-title">文档目录</div>
		
		
		<nav role="navigation">
			


<ul class="summary">
    
    

    

    
        
        
    
        <li class="chapter " data-level="1.1" data-path="../">
            
                <a href="../">
            
                    
                    Introduction
            
                </a>
            

            
        </li>
    
        <li class="chapter " data-level="1.2" data-path="../develop-guide/">
            
                <a href="../develop-guide/">
            
                    
                    开发指导
            
                </a>
            

            
            <ul class="articles">
                
    
        <li class="chapter " data-level="1.2.1" data-path="../develop-guide/geo-json-app.html">
            
                <a href="../develop-guide/geo-json-app.html">
            
                    
                    文件读取示例hello-word
            
                </a>
            

            
        </li>
    

            </ul>
            
        </li>
    
        <li class="chapter " data-level="1.3" data-path="./">
            
                <a href="./">
            
                    
                    spark基础示例
            
                </a>
            

            
            <ul class="articles">
                
    
        <li class="chapter " data-level="1.3.1" data-path="java-rdd.html">
            
                <a href="java-rdd.html">
            
                    
                    JavaRDD操作示例
            
                </a>
            

            
        </li>
    
        <li class="chapter active" data-level="1.3.2" data-path="dataset.html">
            
                <a href="dataset.html">
            
                    
                    Dataset操作示例
            
                </a>
            

            
        </li>
    

            </ul>
            
        </li>
    
        <li class="chapter " data-level="1.4" data-path="../spark-code/">
            
                <a href="../spark-code/">
            
                    
                    spark常用代码片段
            
                </a>
            

            
            <ul class="articles">
                
    
        <li class="chapter " data-level="1.4.1" data-path="../spark-code/map.html">
            
                <a href="../spark-code/map.html">
            
                    
                    map片段
            
                </a>
            

            
        </li>
    
        <li class="chapter " data-level="1.4.2" data-path="../spark-code/flatMap.html">
            
                <a href="../spark-code/flatMap.html">
            
                    
                    flatMap片段
            
                </a>
            

            
        </li>
    
        <li class="chapter " data-level="1.4.3" data-path="../spark-code/wrapped-array.html">
            
                <a href="../spark-code/wrapped-array.html">
            
                    
                    WrappedArray解析片段
            
                </a>
            

            
        </li>
    

            </ul>
            
        </li>
    

    

    <li class="divider"></li>

    <li>
        <a href="https://www.gitbook.com" target="blank" class="gitbook-link">
            Published with GitBook
        </a>
    </li>
</ul>


		</nav>
		
		
	</div>

	<!-- Content nav -->
	<div class="book-anchor">
		<div class="book-anchor-title">在这篇文章中:</div>
		<div class="book-anchor-body">

		</div>
	</div>

	<div class="book-body">
		
		<div class="body-inner">
			
			

<div class="book-header" role="navigation">
    

    <!-- Title -->
    <h1>
        <i class="fa fa-circle-o-notch fa-spin"></i>
        <a href=".." >Dataset操作示例</a>
    </h1>
</div>




			<div class="page-wrapper" tabindex="-1" role="main">
				<div class="page-inner">
					
<div id="book-search-results">
    <div class="search-noresults">
    
					<section class="normal markdown-section">
						
						<h1 id="dataset&#x64CD;&#x4F5C;&#x793A;&#x4F8B;">Dataset&#x64CD;&#x4F5C;&#x793A;&#x4F8B;</h1>
<p>Dataset&#x5F39;&#x6027;&#x6570;&#x636E;&#x96C6;&#xFF0C;&#x7531;scheme&#x4E0E;data&#x6784;&#x6210;&#x3002;scheme&#x53EF;&#x4EE5;&#x7406;&#x89E3;&#x6210;&#x8868;&#x5B57;&#x6BB5;&#x4E0E;&#x5B57;&#x6BB5;&#x7C7B;&#x578B;&#xFF0C;data&#x6BCF;&#x884C;&#x6570;&#x636E;&#x3002;</p>
<p><a href="https://gitee.com/shengshifeiyang/easy-spark/tree/master/src/main/java/com/dukk/espark/apps/DatasetApp.java" target="_blank">&#x4EE3;&#x7801;&#x7C7B;: DatasetApp</a></p>
<h2 id="&#x793A;&#x4F8B;&#x4E00;-&#x52A0;&#x8F7D;&#x6570;&#x636E;">&#x793A;&#x4F8B;&#x4E00; &#x52A0;&#x8F7D;&#x6570;&#x636E;</h2>
<pre><code> //&#x793A;&#x4F8B;&#x4E00; &#x52A0;&#x8F7D;&#x5168;&#x56FD;&#x7701;&#x4EFD; json&#x6570;&#x636E;
 Dataset&lt;Row&gt; jsonDataset = sparkSession.read().json(configKit.getEsparkFilePath());
 jsonDataset.printSchema();
 jsonDataset.show();
//        +--------------------+-----------+-----------------+
//        |            features|       name|             type|
//        +--------------------+-----------+-----------------+
//       |[[[[[[[116.812128...|&#x5168;&#x56FD;_origin|FeatureCollection|
//      +--------------------+-----------+-----------------+
</code></pre><h2 id="&#x793A;&#x4F8B;&#x4E8C;-select-&#x64CD;&#x4F5C;">&#x793A;&#x4F8B;&#x4E8C; select &#x64CD;&#x4F5C;</h2>
<pre><code>   //&#x793A;&#x4F8B;&#x4E8C; select &#x64CD;&#x4F5C;
   Dataset&lt;Row&gt; featuresDataset = jsonDataset.select(&quot;features&quot;);
   featuresDataset.printSchema();
   featuresDataset.show();
</code></pre><h2 id="&#x793A;&#x4F8B;&#x4E09;-flatmap&#x64CD;&#x4F5C;">&#x793A;&#x4F8B;&#x4E09; flatMap&#x64CD;&#x4F5C;</h2>
<pre><code> //&#x89E3;&#x6790;features&#x5168;&#x56FD;&#x7701;&#x4EFD;&#x6570;&#x636E;,&#x5C06;features.properties.name &#x89E3;&#x6790;&#x51FA;&#x6765;
Dataset&lt;Province&gt; provinceDataset = featuresDataset.flatMap((FlatMapFunction&lt;Row, Province&gt;)row-&gt;{

      List&lt;Province&gt; provinceList = new ArrayList&lt;&gt;();
      WrappedArray features = (WrappedArray)row.getAs(&quot;features&quot;);

      for(int i=0; i&lt;features.size(); i++){

           //row &#x64CD;&#x4F5C;
           Row data = (Row)features.apply(i);
           //&#x83B7;&#x53D6;row scheme
           StructType structType = data.schema();
           StructField structFields[] = structType.fields();
           // ... &#x64CD;&#x4F5C;scheme

           Row properties = data.getAs(&quot;properties&quot;);
           String name = properties.getAs(&quot;name&quot;);

           Province province = new Province();
           province.setProvince(name);
           provinceList.add(province);
       }

     return provinceList.iterator();
}, Encoders.bean(Province.class));

provinceDataset.show();
//+--------------+
//|      province|
//+--------------+
//|        &#x5317;&#x4EAC;&#x5E02;|
//|        &#x5929;&#x6D25;&#x5E02;|
//|        &#x6CB3;&#x5317;&#x7701;|
//|        &#x5C71;&#x897F;&#x7701;|
//|  &#x5185;&#x8499;&#x53E4;&#x81EA;&#x6CBB;&#x533A;|
//|        &#x8FBD;&#x5B81;&#x7701;|
//|        &#x5409;&#x6797;&#x7701;|
//|      &#x9ED1;&#x9F99;&#x6C5F;&#x7701;|
//|        &#x4E0A;&#x6D77;&#x5E02;|
//|        &#x6C5F;&#x82CF;&#x7701;|
//|        &#x6D59;&#x6C5F;&#x7701;|
//|        &#x5B89;&#x5FBD;&#x7701;|
//|        &#x798F;&#x5EFA;&#x7701;|
//|        &#x6C5F;&#x897F;&#x7701;|
//|        &#x5C71;&#x4E1C;&#x7701;|
//|        &#x6CB3;&#x5357;&#x7701;|
//|        &#x6E56;&#x5317;&#x7701;|
//|        &#x6E56;&#x5357;&#x7701;|
//|        &#x5E7F;&#x4E1C;&#x7701;|
//|&#x5E7F;&#x897F;&#x58EE;&#x65CF;&#x81EA;&#x6CBB;&#x533A;|
//+--------------+
//only showing top 20 rows
</code></pre>
<script>console.log("plugin-popup....");document.onclick = function(e){ e.target.tagName === "IMG" && window.open(e.target.src,e.target.src)}</script><style>img{cursor:pointer}</style>
						
					</section>
					
    </div>
    <div class="search-results">
        <div class="has-results">
            
            <h1 class="search-results-title"><span class='search-results-count'></span> results matching "<span class='search-query'></span>"</h1>
            <ul class="search-results-list"></ul>
            
        </div>
        <div class="no-results">
            
            <h1 class="search-results-title">No results matching "<span class='search-query'></span>"</h1>
            
        </div>
    </div>
</div>

				</div>
			</div>
			
		</div>

		
		
		<a href="java-rdd.html" class="navigation navigation-prev " aria-label="Previous page: JavaRDD操作示例">
			<i class="fa fa-angle-left"></i>
		</a>
		
		
		<a href="../spark-code/" class="navigation navigation-next " aria-label="Next page: spark常用代码片段">
			<i class="fa fa-angle-right"></i>
		</a>
		
		
		
	</div>
	<script>
		var gitbook = gitbook || [];
		gitbook.push(function() {
			gitbook.page.hasChanged({"page":{"title":"Dataset操作示例","level":"1.3.2","depth":2,"next":{"title":"spark常用代码片段","level":"1.4","depth":1,"path":"spark-code/README.md","ref":"spark-code/README.md","articles":[{"title":"map片段","level":"1.4.1","depth":2,"path":"spark-code/map.md","ref":"spark-code/map.md","articles":[]},{"title":"flatMap片段","level":"1.4.2","depth":2,"path":"spark-code/flatMap.md","ref":"spark-code/flatMap.md","articles":[]},{"title":"WrappedArray解析片段","level":"1.4.3","depth":2,"path":"spark-code/wrapped-array.md","ref":"spark-code/wrapped-array.md","articles":[]}]},"previous":{"title":"JavaRDD操作示例","level":"1.3.1","depth":2,"path":"spark-base/java-rdd.md","ref":"spark-base/java-rdd.md","articles":[]},"dir":"ltr"},"config":{"plugins":["-sharing","-fontsettings","back-to-top-button","copy-code-button","cuav-chapters","heading-anchors","theme-fexa","popup","splitter","multipart@0.3.0"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"splitter":{},"search":{},"cuav-chapters":{"summaryMaxSize":20,"useLimitExpanded":false},"popup":{},"multipart":{},"theme-fexa":{"search-placeholder":"输入关键字搜索","logo":"./logo.png","favicon":"./favicon.ico"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"heading-anchors":{},"highlight":{},"back-to-top-button":{},"copy-code-button":{},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":false}},"theme":"default","author":"shengshi_feiyang@yeah.net","pdf":{"pageNumbers":true,"fontSize":18,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":30,"left":30,"top":30,"bottom":50}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{"themeFexa":{"nav":[{"url":"https://gitee.com/shengshifeiyang/easy-spark","target":"_blank","name":"代码仓库"}]}},"title":"基于spark的快速离线分析框架使用文档","gitbook":"*"},"file":{"path":"spark-base/dataset.md","mtime":"2020-11-11T07:38:21.665Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2020-11-11T07:40:22.004Z"},"basePath":"..","book":{"language":""}});
		});
	</script>
</div>

        
    <script src="../gitbook/gitbook.js"></script>
    <script src="../gitbook/theme.js"></script>
    
        
        <script src="../gitbook/gitbook-plugin-back-to-top-button/plugin.js"></script>
        
    
        
        <script src="../gitbook/gitbook-plugin-copy-code-button/toggle.js"></script>
        
    
        
        <script src="../gitbook/gitbook-plugin-cuav-chapters/cuav-chapters.js"></script>
        
    
        
        <script src="https://cdnjs.cloudflare.com/ajax/libs/anchor-js/1.2.1/anchor.min.js"></script>
        
    
        
        <script src="../gitbook/gitbook-plugin-heading-anchors/anchor-style.js"></script>
        
    
        
        <script src="../gitbook/gitbook-plugin-splitter/splitter.js"></script>
        
    
        
        <script src="../gitbook/gitbook-plugin-search/search-engine.js"></script>
        
    
        
        <script src="../gitbook/gitbook-plugin-search/search.js"></script>
        
    
        
        <script src="../gitbook/gitbook-plugin-lunr/lunr.min.js"></script>
        
    
        
        <script src="../gitbook/gitbook-plugin-lunr/search-lunr.js"></script>
        
    
        
        <script src="../gitbook/gitbook-plugin-theme-fexa/fexa.js"></script>
        
    

    </body>
</html>

